library(ggplot2movies)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(scales)
library(plotly) 
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:plotly':
## 
##     select
## The following object is masked from 'package:dplyr':
## 
##     select
library(dplyr)
library(tidyr)
library(ggplot2)

data("movies")
ggplot(movies, aes(x=length)) + 
    xlim(0, 180) +
    geom_histogram(binwidth = 1) +
    xlab("Duración de películas en minutos") + 
    theme_bw()
## Warning: Removed 392 rows containing non-finite values (stat_bin).

Ejercicio 1 a.¿Qué puedes decir de esta gráfica? Pareciera que dentro de la base existen 2 grupos de películas unas que pudieran ser cortometrajeas ya que duran menos de 50 minutos otro grupo que tiene una media de duración entre 90 y 100 mins. que paree más el tiempo ‘común’ de duración.

b.¿Cómo la modificas para agregar más ticks?

#Para agregar más ticks vamos a usar la opción scale_x_continuous dentro de ggplot:
ggplot(movies%>%filter(length<180), aes(x=length)) + 
    xlim(0, 180) +
    geom_histogram(binwidth = 1) +
    xlab("Duración de películas en minutos") + 
    theme_bw() + scale_x_continuous(breaks = seq(0, 180, by = 10))
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.

c.Haz una gráfica que muestre que los picos de 7 y 90 minutos existían antes y después de 1980

#haremos una variable dummie que identifique antes y despues de 1980
moviesd<-movies%>%filter(length<180)%>%mutate(a80=ifelse(year>=1980,'despues_80','antes_80'))
ggplot(moviesd, aes(x=length)) + 
    xlim(0, 180) +
    geom_histogram(binwidth = 1) +
    xlab("Duración de películas en minutos") + 
    theme_bw() + scale_x_continuous(breaks = seq(0, 180, by = 10))+facet_wrap(~a80)
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.

d.Existe la varaible short que indica si una película es “corta”, ¿Qué gráfica puedes hacer para identificar el criterio que se ocupó para definir esta variable y cuáles están mal clasificadas?

#observando la gráfica partida por antes y después de 1980 pareciera que el criterio de que una película sea corta es de 50 mins porque aparece en el brazo inferior  antes de 1980
ggplotly(ggplot(data=moviesd)+geom_boxplot(aes(x=as.character(Short),y=length))+facet_wrap(~a80)) 
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#si lo vemos por distribuciones parecieran la mismas conclusiones
ggplot(moviesd, aes(x=length)) + 
    xlim(0, 180) +
    geom_histogram(binwidth = 1) +
    xlab("Duración de películas en minutos") + 
    theme_bw() + scale_x_continuous(breaks = seq(0, 180, by = 10))+facet_wrap(~Short)
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.

#por tanto las peliculas mal clasificadas serían:
#de las que se clasifican como largas y son cortas:
mall<-movies%>%filter(Short==0 & length<50)
mall$title
##   [1] "1/2 Mensch"                                                           
##   [2] "Adamah"                                                               
##   [3] "Adelbert"                                                             
##   [4] "Affliction"                                                           
##   [5] "After"                                                                
##   [6] "Alaska"                                                               
##   [7] "Alefbay-e afghan"                                                     
##   [8] "Aliens of the Deep"                                                   
##   [9] "All the Girls I've Loved Before"                                      
##  [10] "Alla ricerca di Tadzio"                                               
##  [11] "Am See"                                                               
##  [12] "Anatomy of Desire"                                                    
##  [13] "Art Pepper: Notes from a Jazz Survivor"                               
##  [14] "Austernprinzessin, Die"                                               
##  [15] "Baldwin's Nigger"                                                     
##  [16] "Beatles at Shea Stadium, The"                                         
##  [17] "Being Ron Jeremy"                                                     
##  [18] "Berndi Broter und der Kasten der Katastrophen"                        
##  [19] "Best Hotel on Skid Row"                                               
##  [20] "Beyond Gravity"                                                       
##  [21] "Big Freeze, The"                                                      
##  [22] "Big Time"                                                             
##  [23] "Black Ships, The"                                                     
##  [24] "Blinde Fotograaf, De"                                                 
##  [25] "Blood: The Last Vampire"                                              
##  [26] "Bondage Queen Kate"                                                   
##  [27] "Breath"                                                               
##  [28] "Brev til Jonas"                                                       
##  [29] "Brief History of Errol Morris, A"                                     
##  [30] "Cadet Classification"                                                 
##  [31] "Cai shu zhi huang sao qian jun"                                       
##  [32] "Candidature"                                                          
##  [33] "Cane Toads"                                                           
##  [34] "Celluloid Dreams"                                                     
##  [35] "Change Up"                                                            
##  [36] "Chatte andalouse, La"                                                 
##  [37] "Chilean Gothic"                                                       
##  [38] "China: The Panda Adventure"                                           
##  [39] "Chirin no suzu"                                                       
##  [40] "Cockfight"                                                            
##  [41] "Comment font les gens"                                                
##  [42] "Corps ouverts, Les"                                                   
##  [43] "Dark Shadows Bloopers"                                                
##  [44] "Denchu Kozo no boken"                                                 
##  [45] "Dive"                                                                 
##  [46] "Dolphins"                                                             
##  [47] "Doroga k zvezdam"                                                     
##  [48] "Down to Hell"                                                         
##  [49] "Dragon Ball Z 6: Gekitotsu!! Hyakuoku power no senshitachi"           
##  [50] "Dragon Ball Z 7: Kyokugen battle!! San dai super saiyajin"            
##  [51] "Dreamwood"                                                            
##  [52] "Drifters"                                                             
##  [53] "Ducktators"                                                           
##  [54] "Eat"                                                                  
##  [55] "Eat Your Makeup"                                                      
##  [56] "Elvis & June: A Love Story"                                           
##  [57] "Eternal Embrace"                                                      
##  [58] "Fiesta"                                                               
##  [59] "Fighting Chance, The"                                                 
##  [60] "Flaming Creatures"                                                    
##  [61] "Footfalls"                                                            
##  [62] "Franzmann"                                                            
##  [63] "Fuoco (la favilla - la vampa - la cenere), Il"                        
##  [64] "Futtock's End"                                                        
##  [65] "Game of Death"                                                        
##  [66] "Gary Cooper: American Life, American Legend"                          
##  [67] "Genesis: A Band in Concert"                                           
##  [68] "Ghosts of the Abyss"                                                  
##  [69] "Grays, The"                                                           
##  [70] "Group Madness"                                                        
##  [71] "Hang Your Hat On the Wind"                                            
##  [72] "Held Hostage in Colombia"                                             
##  [73] "Hemmeligheder"                                                        
##  [74] "Hidden Dimension, The"                                                
##  [75] "Hinter Schloss und Riegel"                                            
##  [76] "His First Flame"                                                      
##  [77] "Histoire d'un crime"                                                  
##  [78] "Hit & Run"                                                            
##  [79] "Home of Your Own, A"                                                  
##  [80] "Homme atlantique, L'"                                                 
##  [81] "Homme qui marche"                                                     
##  [82] "Horny Vampire, The"                                                   
##  [83] "Horse with the Flying Tail, The"                                      
##  [84] "Humanoids from Atlantis"                                              
##  [85] "Hummingbird"                                                          
##  [86] "Hvor mindets blomster gror"                                           
##  [87] "Hypocrites"                                                           
##  [88] "I Am Not What You Want"                                               
##  [89] "I Wanna Be a Porn Star!"                                              
##  [90] "Im Anfang war der Blick"                                              
##  [91] "Imma youjo II"                                                        
##  [92] "Impressionen unter Wasser"                                            
##  [93] "In Smog and Thunder"                                                  
##  [94] "In the Land of the Head Hunters"                                      
##  [95] "In the Line of Fire"                                                  
##  [96] "Inter-View"                                                           
##  [97] "Introducing... Janet"                                                 
##  [98] "Invitation to Monte Carlo"                                            
##  [99] "It's a Bird"                                                          
## [100] "Italianamerican"                                                      
## [101] "Ivan Groznyy III"                                                     
## [102] "Ixe"                                                                  
## [103] "Jay Silverheels: The Man Behind the Mask"                             
## [104] "Jimi Plays Monterey"                                                  
## [105] "Joshua Tree"                                                          
## [106] "Juhannustarinoita"                                                    
## [107] "Kisangany Diary"                                                      
## [108] "Konkurs"                                                              
## [109] "Krtek a koberec"                                                      
## [110] "Krtek ve snu"                                                         
## [111] "Ladies and Gentlemen, Mr. Leonard Cohen"                              
## [112] "Ladyporn"                                                             
## [113] "Leaving Jerusalem by Railway"                                         
## [114] "Legend of the Boy and the Eagle, The"                                 
## [115] "Lemonade Stories"                                                     
## [116] "Leo the Lion"                                                         
## [117] "Let My Puppets Come"                                                  
## [118] "Let's Sing along with Popeye"                                         
## [119] "Leven na mevrouw de Nijs, Het"                                        
## [120] "Looking for Langston"                                                 
## [121] "Love"                                                                 
## [122] "Love Story: Berlin 1942"                                              
## [123] "Love's Prisoner"                                                      
## [124] "Mabel's Wilful Way"                                                   
## [125] "Man in the Bath, The"                                                 
## [126] "Massacre, The"                                                        
## [127] "Matokuningas"                                                         
## [128] "Matrimaniac, The"                                                     
## [129] "Memphis Belle: A Story of a Flying Fortress, The"                     
## [130] "Mer dare"                                                             
## [131] "Michael Jordan to the Max"                                            
## [132] "Mickey and the Beanstalk"                                             
## [133] "Middletown"                                                           
## [134] "Mighty Times: The Children's March"                                   
## [135] "Miss Polly"                                                           
## [136] "Monaco Forever"                                                       
## [137] "Mond im Skorpion"                                                     
## [138] "Monsters Crash the Pajama Party"                                      
## [139] "Moochie of Pop Warner Football"                                       
## [140] "Motel, The"                                                           
## [141] "Mountain of Fire: The Search for the True Mount Sinai"                
## [142] "Mr. Edison at Work in His Chemical Laboratory"                        
## [143] "Mr. Soogy"                                                            
## [144] "Murder on the Waterfront"                                             
## [145] "My Childhood"                                                         
## [146] "Nasu: Andalusia no natsu"                                             
## [147] "Natural Disasters: Forces of Nature"                                  
## [148] "Never a Tender Moment"                                                
## [149] "Niagara"                                                              
## [150] "Nie wiem"                                                             
## [151] "Noche de duendes"                                                     
## [152] "O.J. Simpson: Juice on the Loose"                                     
## [153] "Ode"                                                                  
## [154] "Old Place, The"                                                       
## [155] "One of Them"                                                          
## [156] "Out of Our Dens: The Richard and the Young Lions Story"               
## [157] "Out of the Closet, Off the Screen: The Life of William Haines"        
## [158] "Overtime"                                                             
## [159] "Pair of Boots, A"                                                     
## [160] "Passion of Martin, The"                                               
## [161] "Penis"                                                                
## [162] "Penny and the Pownall Case"                                           
## [163] "Petite vendeuse de soleil, La"                                        
## [164] "Piece of Cake, A"                                                     
## [165] "Pilgrim, The"                                                         
## [166] "Plank, The"                                                           
## [167] "Point, The"                                                           
## [168] "Pretty Boys"                                                          
## [169] "Rain, Drizzle, and Fog"                                               
## [170] "Repair Shop, The"                                                     
## [171] "Report from the Aleutians"                                            
## [172] "Rest of My Life, The"                                                 
## [173] "Richard Pryor: Live and Smokin'"                                      
## [174] "Rien que les heures"                                                  
## [175] "Rough for Theatre II"                                                 
## [176] "Run, Appaloosa, Run"                                                  
## [177] "Sailor-Made Man, A"                                                   
## [178] "Send Me an Angel"                                                     
## [179] "Sex Rituals of the Occult"                                            
## [180] "She Would Be an Actress"                                              
## [181] "Short Film About Decalogue: An Interview with Krzysztof Kieslowski, A"
## [182] "Shoulder Arms"                                                        
## [183] "Shvitz, The"                                                          
## [184] "Side/Walk/Shuttle"                                                    
## [185] "Sink or Swim"                                                         
## [186] "Skin Too Few: The Days of Nick Drake, A"                              
## [187] "So Many Miracles"                                                     
## [188] "Solstice"                                                             
## [189] "Space Station 3D"                                                     
## [190] "Special London Bridge Special, The"                                   
## [191] "Spelen of sterven"                                                    
## [192] "Star Trek: New Voyages"                                               
## [193] "Stolen Honor: Wounds That Never Heal"                                 
## [194] "Strange Adventure of New York Drummer"                                
## [195] "Sumerki zhenskoi dushi"                                               
## [196] "T-Rex: Back to the Cretaceous"                                        
## [197] "Target for Tonight"                                                   
## [198] "Tayna tretey planety"                                                 
## [199] "These Hands"                                                          
## [200] "Thriller"                                                             
## [201] "Tokyo no onna"                                                        
## [202] "Tribulation 99: Alien Anomalies Under America"                        
## [203] "Tunnel Workers, The"                                                  
## [204] "Twice a Man"                                                          
## [205] "Tzel, Shel Hachiuch Shelchah, Ha-"                                    
## [206] "USS VD: Ship of Shame"                                                
## [207] "Undo"                                                                 
## [208] "Unknown, The"                                                         
## [209] "VIP: Very Important Pinoys"                                           
## [210] "Ved verdens ende"                                                     
## [211] "Versailles Rive-Gauche"                                               
## [212] "Vertical Features Remake"                                             
## [213] "Viena"                                                                
## [214] "Vignette: Para Sight"                                                 
## [215] "War Game, The"                                                        
## [216] "Wave Twisters"                                                        
## [217] "Wavelength"                                                           
## [218] "Wheels Locked"                                                        
## [219] "White Pongo"                                                          
## [220] "White Room, The"                                                      
## [221] "Wild and Woolly"                                                      
## [222] "Young Black Stallion, The"                                            
## [223] "Zashchitnik Sedov"                                                    
## [224] "Zyciorys"
#de las que se clasifican como cortas y son larga:
mals<-movies%>%filter(Short==1 & length>=50)
mals$title
##  [1] "10 jaar leuven kort"                             
##  [2] "16 December"                                     
##  [3] "Ataque verbal"                                   
##  [4] "Balettprimadonnan"                               
##  [5] "Cielo gira, El"                                  
##  [6] "Commies Are Coming, the Commies Are Coming, The" 
##  [7] "December 7th"                                    
##  [8] "Do You Remember Laurie Zimmer?"                  
##  [9] "Doggy Poo"                                       
## [10] "Ellie Parker"                                    
## [11] "Filmmaker"                                       
## [12] "Ghost Goes Gear, The"                            
## [13] "Hopnick"                                         
## [14] "Inamorata"                                       
## [15] "Legong: Dance of the Virgins"                    
## [16] "Morten Korch - Solskin kan man altid finde"      
## [17] "Napoleon Blown-Aparte"                           
## [18] "Now or Never"                                    
## [19] "Only Once"                                       
## [20] "Red Trousers: The Life of the Hong Kong Stuntmen"
## [21] "Return of Draw Egan, The"                        
## [22] "Spin"                                            
## [23] "Tenshi no tamago"                                
## [24] "Thundering 8th, The"                             
## [25] "Walk Don't Walk"                                 
## [26] "Zaboravljeni"

Ejercicio 2

ggplot(movies, aes(x=votes, y=rating)) +
  geom_point() +
  ylim(1,10) + scale_x_continuous(label=comma) + theme_bw()

a.Agrega alpha-blending ¿Qué pasa con los outliers? ¿Diferentes valores funcionan mejor?

#usamos alpha y vemos que mientras mas pequena  los outliers se ven notan claros ya que estan fuera del grueso de los datos(lo mas negro)
ggplot(movies, aes(x=votes, y=rating)) +
  geom_point(alpha=.1) +
  ylim(1,10) + scale_x_continuous(label=comma) + theme_bw()

b.¿Cómo se ve la gráfica si remueves las películas con menos de 100 votos?

ggplot(movies%>%filter(votes>=100), aes(x=votes, y=rating)) +
  geom_point(alpha=.1) +
  ylim(1,10) + scale_x_continuous(label=comma) + theme_bw()

  #la diferenca es que con este filtro podemos ver que estas peliculas que quitamos tenian ratings altos 9 o 10 en esta grafica ya no hay estos ratings...

c.¿Cómo se ve la gráfica si remueves todas las películas que tienen un rating arriba de 9?

ggplot(movies%>%filter(rating>=9), aes(x=votes, y=rating)) +
  geom_point(alpha=.1) +
  ylim(1,10) 

  scale_x_continuous(label=comma) + 
  theme_bw()
## NULL
  #parece que las peliculas con alto rating tienen pocos votos!

Ejercicio 3

data("Cars93")

ggplot(Cars93, aes(x=Weight, y=MPG.city)) +
  geom_smooth(colour="green") +
  ylim(0, 50) + 
  scale_x_continuous(label=comma) +
  geom_point() +
  theme_bw() 
## `geom_smooth()` using method = 'loess'

a.¿Cuál es el outlier de la izquierda?

#haciendo una grafica de caja y brazos podemos ver que ese valor es 46...
ggplotly(ggplot(data=Cars93,aes(x='var',y=MPG.city))+geom_boxplot())
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`

b.En muchos países en lugar de medirse en millas por galón, se mide en litros por 100 km. ¿Qué pasa si graficas MPG.city contra Horsepower? ¿Existe una relación lineal? ¿Cuáles son los outliers?

#Parece lineal negativa pero no ajusta del todo y su coeficiente de correlacion es de -.67, 
ggplot(data=Cars93,aes(x=MPG.city*0.0042514285,y=Horsepower))+geom_point()+geom_smooth(method='lm',formula=y~x)

#coeficiente de correlacion..
cor(Cars93$MPG.city,Cars93$Horsepower)
## [1] -0.6726362

Los outliers son valores de Horsepower arriba de 255…

ggplotly(ggplot(data=Cars93,aes(x='var',y=Horsepower))+geom_boxplot())
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`

Los outliers son valores de MPG.city en litros por 100km son valores arriba de 0.14

ggplotly(ggplot(data=Cars93,aes(x='var',y=MPG.city*0.0042514285))+geom_boxplot())
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`

También se podemos hacer una matriz de scatterplots -splom (como lo hicimos con los histogramas :)), para ello ocupamos el método ggpairs de la librería GGally en el dataset de precios de vivienda ade Boston.

library(GGally)
## 
## Attaching package: 'GGally'
## The following object is masked from 'package:dplyr':
## 
##     nasa
dplyr::select(Boston, -rad, -chas) %>% 
ggpairs(title="Boston dataset", diag=list(continuous="density", axisLabels='none'))
## Warning in check_and_set_ggpairs_defaults("diag", diag, continuous =
## "densityDiag", : Changing diag$continuous from 'density' to 'densityDiag'

Ejercicio 4 a.¿Cuáles están positivamente correlacionados con medv? zn.-.36, rm.-.695,dis.-.25,black-.333

b.La variable crim -tasa de crímenes per cápita- tiene scatterplots con forma inusual, donde los valores altos de crim solo ocurren para un valor de la otra variable ¿Qué explicación le puedes dar?

new<-dplyr::select(Boston, crim,zn,indus,age,dis,tax,rm,medv)
ggplot(data=new,aes(x=crim,y=zn))+geom_point()

ggplot(data=new,aes(x=crim,y=indus))+geom_point()

De acuerdo con estas gráficas lo que podemos decir es que las variables condicionan los casos en donde existen crímenes, por ejemplo el caso zn los crímenes sólo se dan donde zn es cero y para el caso de indus sólo se dan para valores cerca de 20 entonces pareciera que los datos de crimen sólo existieran para estos valores o sólo se censaron estos.

c.Hay varias formas en los scatterplots, escoge 5 y explica cómo las interpretas

new<-dplyr::select(Boston,crim,zn,indus,age,dis,tax,rm,medv)

#crim vs. zn como habíamos explicado parece que sólo para u valor varían las tasas de crímenes el resto iene tasa 0 de crímenes.
ggplot(data=new,aes(x=crim,y=zn))+geom_point()

#indus vs. age exise una correlación positiva marcada a mayor indus( proportion of non-retail business acres per town.) mayor age(proportion of owner-occupied units built prior to 1940) pero con muchos ajustes no es tan clara esta relación
ggplot(data=new,aes(x=indus,y=age))+geom_point()

#dis vs. tax parece que tax tiene mayor variabilidad para tax menor que 500 pdieran ser los tax mayores a este valor outliers.
ggplot(data=new,aes(x=dis,y=tax))+geom_point()

#rm vs. medv relación claramente positiva a mayor average number of rooms per dwelling mayor Median value of owner-occupied homes in $1000's
ggplot(data=new,aes(x=rm,y=medv))+geom_point()

#rm vs. age variables poco relacionadas parecieran ser indpendientes o tener poca injerencia una en la otra tiene sentido que el numero de habitaciones por vivienda no se relacione con la proporcion de unidades ocupadas por el dueño antes de 1940.
ggplot(data=new,aes(x=rm,y=age))+geom_point()

Ejercicio 5 a.Usando el dataset Boston realiza un pcp, intenta resaltar las características que haz observado en los ejercicios anteriores. Piensa cómo le hiciste…

#veamos la distribución de las tasas de crímenes en boston..
summary(Boston$crim)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##  0.00632  0.08204  0.25650  3.61400  3.67700 88.98000
#observamos que más del 50% de ls datos tiene  tasasmenores al 1% por tanto vamos a definir las tasas de críemenes como mucho>1% y poco lo contrario...
newb<-Boston%>%mutate(cr=ifelse(crim<1,'poco','mucho'))

#hacemos el pcp con las variables del ejercicio anterior...
ggparcoord(newb, columns=c(2:3,6:8,10,14), groupColumn = "cr") 

Podemos corroborar en 1 sola gráfica que los casos donde hay mucho crimen s donde los valores para rn son bajos, al igual que para medv,dis,zn y para indus y ta e crimen es mayor con valores altos…